home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C/C++ Users Group Library 1996 July
/
C-C++ Users Group Library July 1996.iso
/
vol_100
/
185_01
/
ssort.c
< prev
next >
Wrap
Text File
|
1985-08-19
|
22KB
|
880 lines
/*
* SSORT.C - Selecta-SORT
* version 1.0
* This is a modified version of LEXSORT which permits a command line
* option to select the collating sequence from a "magic" file.
* The name was changed only due to the fact that LEXSORT was no longer
* (if it ever was) appropriate. LEXSORT.LBR should be retired.
*
* Differences from LEXSORT.C:
*
* The only changed functions were options() and usage(). Two functions
* were added: get_collating_sequence(), collate_file().
* Four #defines were added: INPUT, ABSOLUTE, FUDGE, COLLATE_NAME.
* The signon message was changed. Globals were not modified.
*
* Harvey Moran 2/26/84
*
* To build with BDS C vers 1.5:
* cc ssort -o -e 3800
* casm lexlate
* asm lexlate.DDz ;choose drives (D) to suit
* ddt
* f100,1000,0 (not necessary, makes file compares of '.crl' meaningful)
* iLEXLATE.HEX
* g0
* save 4 lexlate.crl
* l2 ssort lexlate
*
* Usage:
*
* SSORT <infile> <outfile> [-c<index 0 to n>] [-k<key field selections>]
*
* -c<n> seletcs the n'th sort order precedence table from the
* "magic" file SSORT.OVL to be overlayed into lexlate(). See
* SORTORDR.ASM for the definition of these tables. ASseMble and LOAD
* SORTORDR.ASM, then rename to SSORT.OVL.
* If -c is not used, the default remains the same lexicographical
* increasing order sort from LEXSORT (or whatever you "wire" into
* LEXLATE.CRL).
*
* All other command line syntax remains the same as LEXSORT.
*
* Harvey Moran 2/26/84
*
* =====================================================================
* LEXSORT.C (modified SORT3.C for lexicographical ordering) HRM 11/6/83
* version 1.0
* To build with BDS C vers 1.5:
* cc lexsort -o -e 3400
* casm lexlate
* asm lexlate.DDz ;choose drives (D) to suit
* ddt
* f100,1000,0 (not necessary, makes file compares of '.crl' meaningful)
* iLEXLATE.HEX
* g0
* save 4 lexlate.crl
* l2 lexsort lexlate
*
* Usage: lexsort <infile> <outfile> [-k<sort key list>]
*
* where <sort key list> is:
*
* A comma separated list of column numbers or ranges
* specifing the sort key positions.
* e.g.
* lexsort messy.dat neat.dat -k3-5,7-9,1-2,12
*
* specifies that:
* the input file is MESSY.DAT
* the output file is NEAT.DAT
*
* The primary sort key is columns 3 thru 5
* The first secondary sort key is columns 7 thru 9
* The next secondary sort key is columns 1 thru 2
* The last secondary sort key is columns 12 thru end of line
*
* A sort key of 1 column may be specified as 3-3 for example.
* A sort key which goes to end of line need NOT be the last one.
*
* The leftmost column is numbered 1.
* The default sort key is the entire line.
*
* Implementation note:
*
* LEXLATE.CSM contains function lexlate() which determines the
* character ordering for the character set. This concept includes
* the notion of totally INGOREing some characters. The LEXLATE.CSM
* provided IGNOREs all characters but space, A-Z, a-z, 0-9. It also
* treats 'A' as less than but adjacent to 'a', etc. If a specified
* key field consists entirely of IGNORE characters, it is considered
* the lowest order for that key. A line which has no entry for the
* specified key field (because it is too short) will be the next lowest
* order in the sort for that field.
*
* Derived from: SORT3.C by H.R. Moran, Jr. 11/5/83
* changes made:
* 1) Re-format, indent, and comment to suit me, including deletion
* of some extraneous declarations and code lines.
* 2) Re-write compar(), and include use of lexlate()
* 3) Add options() to allow key field selections.
*
* SORT3.C comments follow:
* ---------------------------------------------------------------------------
* Sort/Merge from Software Tools
*
* Last Modified : 21 September 1982
*
* Converted from Software Tools RATFOR to BDS C by Leor Zolman
* Sep 2, 1982
*
* Usage: sort <infile> <outfile>
*
* Main variables have been made external; this is pretty much in
* line with the RATFOR call-by-name convention anyway.
*
* Requires lots of disk space, up to around three times the length
* of the file file being sorted. This program is intended for files
* bigger than memory; simpler, faster sorts can be implemented for
* really short files (like ALPH.C)
* -leor
*
* Compile & Link:
* A>cc sort.c -e2800 -o
* A>l2 sort
* (or...)
* A>cc sort.c -e2900 -o
* A>clink sort
*
*/
#include <bdscio.h>
#define SIGNON "ssort version 1.0 - 2/26/84 - hrm\n"
#define FUDGE 0xe /* offset into the lexlate function for table address */
#define COLLATE_NAME collate_file()
/* #define DEBUG */ /* enables debug printout when defined */
#define BOOL int /* BOOlean */
#define PROC int /* PROCedure */
#define TRIAD int /* -1, 0, or +1 */
#define IGNORE 0xff /* lexlate() token to ignore the character */
#define VERBOSE 1 /* give running account of file activity */
#define MAXLINE 200 /* longest line we want to deal with */
#define NBUFS 7 /* Max number of open buffered files */
#define MAXPTR 3000 /* Max number of lines (set for dict) */
#define MERGEORDER (NBUFS-1) /* Max # of intermediate files to merge */
#define NAMESIZE 20 /* Max Filename size */
#define LOGPTR 13 /* smallest value >= log (base 2) of MAXPTR */
#define EOS '\0' /* string termination character */
#define FILE struct _buf
#define stderr 4
#define fputc putc
char name[NAMESIZE], name2[NAMESIZE + 10];
FILE buffers[NBUFS + 1]; /* up to NBUFS general-purp. buffered files */
FILE *infil[MERGEORDER + 1]; /* tmp file ptrs for sort operation */
unsigned linptr[MAXPTR + 1], nlines;
int temp;
unsigned maxtext; /* max # of chars in main text buffer */
/*
* KEY FIELD selection support variables - hrm
*/
#define MAXFIELDS 20
#define FIELDS struct _fields
FIELDS {
int _numfields;
int _strtcol[MAXFIELDS];
int _stopcol[MAXFIELDS];
} sortfields;
#define FASTLOCAL
#ifdef FASTLOCAL
/*
* FAST locals for compar()
*/
struct {
unsigned i, j;
unsigned k, l;
unsigned x1, x2;
unsigned len_i, len_j;
char c1, c2;
} z;
#endif
#ifdef DEBUG
char spacebuffer[200];
#endif
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
/*!!!!!!!!!!!!!!!! WARNING (H.Moran) !!!!!!!!! */
/*! The algorithm INSISTS that THIS name be the LAST global ! */
/*! ! */
char *linbuf; /* text area starts after this variable */
/* */
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
main(argc, argv)
int argc;
char *argv[];
{
int min(), gtext();
FILE *infile, *outfile; /* main input and output streams */
FILE *tmpfile;
unsigned high, lim, low;
BOOL more_text;
puts(SIGNON);
linbuf = endext(); /* start of text buffer area */
maxtext = topofmem() - endext() - 500;
tmpfile = buffers[0];
options(&argc, argv); /* process command line options */
if ( argc != 3 ) {
usage("");
}
infile = buffers[1];
if ( fopen(argv[1], infile) == ERROR ) {
puts("Can't open ");
puts(argv[1]);
exit(-1);
}
#if VERBOSE
fputs("Beginning initial formation run\n", stderr);
#endif
high = 0; /* Initial formation of runs: */
do {
more_text = gtext(infile);
quick(nlines);
high++;
makfil(high, tmpfile);
ptext(tmpfile);
fclout(tmpfile);
} while ( more_text );
fclose(infile); /* free up the input file buffer */
#if VERBOSE
fputs("Beginning merge operation\n", stderr);
#endif
for ( low = 1; low < high; low += MERGEORDER ) { /* merge */
lim = min(low + MERGEORDER - 1, high);
gopen(low, lim); /* open files */
high++;
makfil(high, tmpfile);
merge(lim - low + 1, tmpfile);
fclout(tmpfile); /* terminate, flush and close file */
gremov(low, lim);
}
/*
* Now write the sorted output file:
*/
#if VERBOSE
fputs("Merge c